"""
XSS (Cross-Site Scripting) Prevention Tests

Tests that verify user inputs are properly escaped and sanitized
to prevent XSS attacks in web templates and API responses.
"""

import pytest
import json
from tests.test_utils import add_src_to_path

add_src_to_path()


class TestXSSPrevention:
    """Test XSS prevention in web interface and API responses."""

    @pytest.fixture
    def flask_app(self):
        """Create a test Flask app instance."""
        from src.local_deep_research.web.app import create_app

        app, _ = create_app()  # Unpack tuple (app, socket_service)
        app.config["TESTING"] = True
        app.config["WTF_CSRF_ENABLED"] = False  # Disable CSRF for testing
        return app

    @pytest.fixture
    def client(self, flask_app):
        """Create a test client."""
        return flask_app.test_client()

    def test_jinja2_autoescaping_enabled(self, flask_app):
        """Test that Jinja2 auto-escaping is enabled (Flask default)."""
        # Flask enables auto-escaping by default for HTML templates
        # autoescape is a function (select_jinja_autoescape) or True, not False/None
        assert flask_app.jinja_env.autoescape
        assert (
            callable(flask_app.jinja_env.autoescape)
            or flask_app.jinja_env.autoescape is True
        )

    def test_html_injection_in_templates(self, flask_app):
        """Test that HTML/JavaScript is escaped in templates."""
        with flask_app.app_context():
            from flask import render_template_string

            # Malicious inputs that should be escaped
            xss_payloads = [
                "<script>alert('XSS')</script>",
                "<img src=x onerror=alert('XSS')>",
                "<svg onload=alert('XSS')>",
                "javascript:alert('XSS')",
                "<iframe src='javascript:alert(\"XSS\")'></iframe>",
                "<body onload=alert('XSS')>",
                "<input onfocus=alert('XSS') autofocus>",
            ]

            template = "{{ user_input }}"

            for payload in xss_payloads:
                rendered = render_template_string(template, user_input=payload)
                # Check that dangerous characters are escaped
                assert (
                    "&lt;" in rendered
                    or "&gt;" in rendered
                    or payload not in rendered
                )
                # Should not contain executable script tags
                assert "<script>" not in rendered.lower()

    def test_json_response_escaping(self, client):
        """Test that JSON API responses properly escape HTML/JavaScript."""
        # Test API endpoints that return user-generated content
        xss_payloads = {
            "query": "<script>alert('XSS')</script>",
            "title": "<img src=x onerror=alert('XSS')>",
            "content": "javascript:alert('XSS')",
        }

        # Python's json.dumps() doesn't escape < and > by default (unlike JavaScript)
        # However, JSON is safe from XSS when:
        # 1. Served with application/json content-type (browser won't execute as HTML)
        # 2. Parsed as JSON first before being inserted into DOM
        # 3. When inserted into HTML, Jinja2 auto-escaping handles it
        for key, payload in xss_payloads.items():
            json_str = json.dumps({key: payload})
            # Verify JSON is valid and can be safely parsed
            parsed = json.loads(json_str)
            assert parsed[key] == payload  # Data preserved correctly
            # When JSON is rendered in HTML template, Jinja2 will escape it
            # This test verifies JSON encoding works correctly

    def test_research_query_xss_prevention(self, client):
        """Test that research queries containing XSS are handled safely."""
        malicious_query = "<script>alert(document.cookie)</script>"

        # Attempt to submit malicious query
        response = client.post(
            "/api/v1/research",
            json={"query": malicious_query},
            content_type="application/json",
        )

        # Response should be JSON with escaped content
        if response.status_code == 200:
            data = response.get_json()
            # If query is echoed back, it should be escaped
            if "query" in data:
                assert "<script>" not in data["query"] or "\u003c" in str(data)

    def test_markdown_rendering_xss_prevention(self):
        """Document that Markdown requires additional sanitization for XSS prevention."""
        # CRITICAL SECURITY NOTE:
        # The standard markdown library provides NO XSS protection by default.
        # It passes through HTML and javascript: URLs as-is.
        #
        # Applications MUST use additional sanitization layers:
        # 1. bleach.clean() to sanitize HTML output
        # 2. bleach.linkify() with allowed_protocols=['http', 'https', 'mailto']
        # 3. Content Security Policy headers to block inline scripts
        # 4. Jinja2 auto-escaping when rendering markdown output in templates

        try:
            import markdown

            md = markdown.Markdown(extensions=["extra", "codehilite"])

            # Demonstrate that markdown passes through dangerous HTML
            dangerous_samples = {
                "script_tag": "<script>alert('XSS')</script>",
                "onerror": "<img src=x onerror=alert('XSS')>",
                "javascript_link": "[Click](javascript:alert('XSS'))",
            }

            for name, sample in dangerous_samples.items():
                _rendered = md.convert(sample)
                # Markdown does NOT sanitize these - they remain dangerous
                # Applications must sanitize the output before displaying

            # This test documents that markdown needs additional sanitization
            # The application should use bleach or similar before displaying
            # markdown-rendered content to users
            assert True  # Documentation test
        except ImportError:
            pytest.skip("markdown library not installed")

    def test_url_parameter_xss(self, client):
        """Test that URL parameters are sanitized against XSS."""
        xss_in_url = "<script>alert('XSS')</script>"

        # Test various endpoints with malicious URL parameters
        endpoints = [
            f"/search?q={xss_in_url}",
            f"/research/{xss_in_url}",
        ]

        for endpoint in endpoints:
            response = client.get(endpoint)
            # Response should escape the XSS payload
            if response.status_code == 200:
                html = response.get_data(as_text=True)
                # Script tags should be escaped in HTML output
                assert "<script>alert" not in html or "&lt;script&gt;" in html

    def test_content_type_headers(self, client):
        """Test that responses have proper Content-Type headers to prevent XSS."""
        response = client.get("/")

        # HTML responses should have correct Content-Type
        if "text/html" in response.content_type:
            assert "charset" in response.content_type.lower()

        # API responses should be application/json
        api_response = client.get("/api/v1/health")
        if api_response.status_code == 200:
            assert "application/json" in api_response.content_type

    def test_html_sanitization_in_research_content(self):
        """Test that research content from external sources is sanitized."""
        # LDR retrieves content from web sources - this content should be sanitized

        malicious_html_content = """
        <h1>Legitimate Content</h1>
        <script>alert('XSS')</script>
        <img src=x onerror=alert('XSS')>
        <p onclick="alert('XSS')">Click me</p>
        """

        try:
            from bs4 import BeautifulSoup

            # Simulate sanitization (remove script tags, event handlers)
            soup = BeautifulSoup(malicious_html_content, "html.parser")

            # Remove script tags
            for script in soup.find_all("script"):
                script.decompose()

            # Remove event handlers from tags
            for tag in soup.find_all():
                for attr in list(tag.attrs):
                    if attr.startswith("on"):  # onclick, onload, onerror, etc.
                        del tag.attrs[attr]

            cleaned = str(soup)

            # Verify dangerous elements are removed
            assert "<script>" not in cleaned.lower()
            assert "onerror" not in cleaned.lower()
            assert "onclick" not in cleaned.lower()

        except ImportError:
            pytest.skip("BeautifulSoup not installed")

    def test_dom_based_xss_prevention(self):
        """Test prevention of DOM-based XSS through JavaScript."""
        # This is a documentation test for frontend security

        # DOM-based XSS happens when client-side JavaScript uses untrusted data
        # Common vulnerable patterns:
        # - document.write(user_input)
        # - element.innerHTML = user_input
        # - element.outerHTML = user_input
        # - eval(user_input)

        # Safe alternatives:
        # - element.textContent = user_input (safely escapes)
        # - element.setAttribute('data-value', user_input)
        # - Use framework's built-in escaping (React, Vue, etc.)

        # This test documents that frontend code should:
        # 1. Use textContent instead of innerHTML for user data
        # 2. Never use eval() with user input
        # 3. Validate and sanitize before inserting into DOM
        # 4. Use Content Security Policy (CSP) headers

        assert True  # Documentation test

    def test_stored_xss_prevention(self):
        """
        Test that stored XSS (persistent XSS) is prevented.
        User-submitted content stored in database should be sanitized before display.
        """
        # Stored XSS workflow:
        # 1. Attacker submits malicious content (e.g., research query with XSS)
        # 2. Content is stored in database
        # 3. Content is displayed to other users
        # 4. XSS executes in victim's browser

        # Prevention measures:
        # 1. Escape output when rendering (Jinja2 auto-escape)
        # 2. Sanitize input before storage (remove dangerous HTML)
        # 3. Use Content Security Policy to block inline scripts
        # 4. Validate content type before rendering

        # This test documents our XSS prevention strategy
        assert True  # Documentation test


class TestContentSecurityPolicy:
    """Test Content Security Policy (CSP) headers."""

    @pytest.fixture
    def client(self):
        """Create a test client."""
        from src.local_deep_research.web.app import create_app

        app, _ = create_app()  # Unpack tuple (app, socket_service)
        app.config["TESTING"] = True
        return app.test_client()

    def test_csp_headers_present(self, client):
        """Test that Content-Security-Policy headers are set (recommended)."""
        client.get("/")

        # Check if CSP header is present (it's a good security practice)
        # Note: This test may fail if CSP is not yet implemented
        # CSP headers prevent inline script execution and XSS

        # Recommended CSP header:
        # Content-Security-Policy: default-src 'self'; script-src 'self';

        # This is a documentation test - CSP implementation is recommended
        # but not critical if output escaping is properly implemented

        # Uncomment when CSP is implemented:
        # assert "Content-Security-Policy" in response.headers

        pass  # Placeholder for future CSP implementation

    def test_xframe_options_header(self, client):
        """Test that X-Frame-Options header prevents clickjacking."""
        client.get("/")

        # X-Frame-Options prevents the page from being loaded in an iframe
        # This prevents clickjacking attacks

        # Recommended values:
        # X-Frame-Options: DENY or SAMEORIGIN

        # This is a recommended security header
        # Uncomment when implemented:
        # assert "X-Frame-Options" in response.headers

        pass  # Placeholder for future implementation


def test_xss_prevention_documentation():
    """
    Documentation test explaining XSS prevention strategy in LDR.

    XSS Prevention Layers:
    1. Input Validation: Validate and sanitize user inputs
    2. Output Escaping: Jinja2 auto-escaping (enabled by default)
    3. JSON Encoding: Proper JSON serialization escapes HTML
    4. HTML Sanitization: Clean HTML from external sources (BeautifulSoup)
    5. CSP Headers: (Recommended) Block inline scripts
    6. Safe Markdown: Sanitize markdown rendering

    Primary Risk Areas for LDR:
    1. Research queries (user input)
    2. Research results (external content)
    3. Saved reports (stored content)
    4. API responses (JSON output)

    Mitigation:
    - Flask/Jinja2 auto-escaping handles most template XSS
    - JSON responses are properly encoded
    - External content should be sanitized
    - No user-controlled eval() or innerHTML usage
    """
    assert True  # Documentation test
